UTF-8の求め方 ビット演算の練習問題
UTF-8の求め方
1. ^(U+005E / 0x0000から0x007F)
答えは0x5E
CodePointの確認
code:js
'^'.codePointAt(0).toString(16)
//'5e'
'^'.codePointAt(0).toString(10)
//'94'
'^'.codePointAt(0).toString(2)
//'1011110'
求め方に当てはめるとこうなる
code:js
// 00000000
//| 1011110
//----------
// 01011110 => 5eになるはず
ビット演算を使って求める
code:js
('^'.codePointAt(0) | 00000000).toString(2)
//'1011110'
結果
code:js
[
('^'.codePointAt(0) | 00000000)
]
// 94 => '5e'
2. Ӑ(U+04D0 / 0x0080から0x07FF)
答えは0xD390
CodePointの確認
code:js
'Ӑ'.codePointAt(0).toString(16)
//'4d0'
'Ӑ'.codePointAt(0).toString(10)
//'1232'
'Ӑ'.codePointAt(0).toString(2)
//'10011010000'
求め方に当てはめるとこうなる
code:js
// 11000000 10000000
//| 10011 010000
//---------------------
// 11010011 10010000 => d390になるはず
ビット演算を使って求める
code:js
//下6桁だけほしいから、下6桁だけが1のビット論理積をとる
(('Ӑ'.codePointAt(0) & 0b00111111)).toString(2)
//'10000'
//これと10000000のビット論理和をとる
(('Ӑ'.codePointAt(0) & 0b00111111) | 0b10000000).toString(2)
//'10010000'
//16進数だと0x90
(('Ӑ'.codePointAt(0) & 0b00111111) | 0b10000000).toString(16)
//'90'
//次の5桁、使った6ビット分ずらす
('Ӑ'.codePointAt(0) >> 6).toString(2)
//'10011'
//これと11000000のビット論理和をとる
((('Ӑ'.codePointAt(0) >> 6) & 0b00111111) | 0b11000000).toString(2)
//'11010011'
//16進数だと0xd3
((('Ӑ'.codePointAt(0) >> 6) & 0b00011111) | 0b11000000).toString(16)
//'d3'
結果
code:js
[
((('Ӑ'.codePointAt(0) >> 6) & 0b00111111) | 0b11000000),
(('Ӑ'.codePointAt(0) & 0b00111111) | 0b10000000)
]
//(2) 211, 144 => 'd3', '90'
3. ︰(U+FE30 / 0x0800から0xFFFF)
答えは0xEFB8B0
CodePointの確認
code:js
'︰'.codePointAt(0).toString(16)
//'fe30'
'︰'.codePointAt(0).toString(10)
//'65072'
'︰'.codePointAt(0).toString(2)
//'1111111000110000'
求め方に当てはめるとこうなる
code:js
// 11100000 10000000 10000000
//| 1111 111000 110000
//----------------------------
// 11101111 10111000 10110000 => efb8b0になるはず
ビット演算を使って求める
code:js
//下6桁
('︰'.codePointAt(0) & 0b00111111).toString(2)
//'110000'
(('︰'.codePointAt(0) & 0b00111111) | 0b10000000).toString(2)
//'10110000'
(('︰'.codePointAt(0) & 0b00111111) | 0b10000000).toString(16)
//'b0'
//次6桁
(('︰'.codePointAt(0) >> 6) & 0b00111111).toString(2)
//'111000'
((('︰'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000).toString(2)
//'10111000'
((('︰'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000).toString(16)
//'b8'
//次4桁
(('︰'.codePointAt(0) >> 12) & 0b00001111).toString(2)
//'1111'
((('︰'.codePointAt(0) >> 12) & 0b00001111) | 0b11100000).toString(2)
//'11101111'
((('︰'.codePointAt(0) >> 12) & 0b00001111) | 0b11100000).toString(16)
//'ef'
結果
code:js
[
((('︰'.codePointAt(0) >> 12) & 0b00001111) | 0b11100000),
((('︰'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000),
(('︰'.codePointAt(0) & 0b00111111) | 0b10000000)
]
//(3) 239, 184, 176 => 'ef', 'b8', 'b0'
4. 🀄(U+1F004 / 0x10000から0x10FFFF)
答えは0xF09F8084
CodePointの確認
code:js
'🀄'.codePointAt(0).toString(16)
//'1f004'
'🀄'.codePointAt(0).toString(10)
//'126980'
'🀄'.codePointAt(0).toString(2)
//'11111000000000100'
求め方に当てはめるとこうなる
code:js
// 11110000 10000000 10000000 10000000
//| 000 011111 000000 000100
//-------------------------------------
// 11110000 10011111 10000000 10000100 => f09f8084になるはず
ビット演算を使って求める
code:js
//下6桁
('🀄'.codePointAt(0) & 0b00111111).toString(2)
//'100'
(('🀄'.codePointAt(0) & 0b00111111) | 0b10000000).toString(2)
//'10000100'
(('🀄'.codePointAt(0) & 0b00111111) | 0b10000000).toString(16)
//'84'
//次6桁
(('🀄'.codePointAt(0) >> 6) & 0b00111111).toString(2)
//'0'
((('🀄'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000).toString(2)
//'10000000'
((('🀄'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000).toString(16)
//'80'
//次6桁
(('🀄'.codePointAt(0) >> 12) & 0b00111111).toString(2)
//'11111'
((('🀄'.codePointAt(0) >> 12) & 0b00111111) | 0b10000000).toString(2)
//'10011111'
((('🀄'.codePointAt(0) >> 12) & 0b00111111) | 0b10000000).toString(16)
//'9f'
//次3桁
(('🀄'.codePointAt(0) >> 18) & 0b00000111).toString(2)
//'0'
((('🀄'.codePointAt(0) >> 18) & 0b00000111) | 0b11110000).toString(2)
//'11110000'
((('🀄'.codePointAt(0) >> 18) & 0b00000111) | 0b11110000).toString(16)
//'f0'
結果
code:js
[
((('🀄'.codePointAt(0) >> 18) & 0b00000111) | 0b11110000),
((('🀄'.codePointAt(0) >> 12) & 0b00111111) | 0b10000000),
((('🀄'.codePointAt(0) >> 6) & 0b00111111) | 0b10000000),
(('🀄'.codePointAt(0) & 0b00111111) | 0b10000000)
]
//(4) 240, 159, 128, 132 => 'f0', '9f', '80', '84'
ほっけ
code:js
'𩸽'.codePointAt(0).toString(2)
// '101001111000111101'
// 0b10_1001_1110_0011_1101
'𩸽'.codePointAt(0)
// 171581
'𩸽'.codePointAt(0).toString(16)
// '29e3d'
[
('𩸽'.codePointAt(0) & 0b0001_1100_0000_0000_0000_0000) >> 18 | 0b1111_0000,
('𩸽'.codePointAt(0) & 0b0000_0011_1111_0000_0000_0000) >> 12 | 0b1000_0000,
('𩸽'.codePointAt(0) & 0b0000_0000_0000_1111_1100_0000 ) >> 6 | 0b1000_0000,
('𩸽'.codePointAt(0) & 0b0000_0000_0000_0000_0011_1111 ) | 0b1000_0000
]
// (4) 240, 169, 184, 189
code:1
0b0000_0010_1001_1110_0011_1101 ... 171581 (0x029e3d)
& 0b0001_1100_0000_0000_0000_0000 ... 1835008(0x1c0000)との論理積をとる
---------------------------------
0b0000_0000_0000_0000_0000_0000 ... 0(0x00)
> 18 ... 18ビット右にシフトする
---------------------------------
0b0000_0000 ... 0 (0x00)
| 0b1111_0000 ... 240(0xf0)との論理和をとる
---------------------------------
0b1111_0000 ... 240(0xf0)
code:2
0b0000_0010_1001_1110_0011_1101 ... 171581(0x29e3d)
& 0b0000_0011_1111_0000_0000_0000 ... 258048(0x3f000)との論理積をとる
---------------------------------
0b0000_0010_1001_0000_0000_0000 ... 167936(0x29000)
> 12 ... 12ビット右にシフトする
---------------------------------
0b0010_1001 ... 41 (0x29)
| 0b1000_0000 ... 128(0x80)との論理和をとる
---------------------------------
0b1010_1001 ... 169(0xa9)
code:3
0b0000_0010_1001_1110_0011_1101 ... 171581(0x29e3d)
& 0b0000_0000_0000_1111_1100_0000 ... 4032 (0x00fc0)との論理積をとる
---------------------------------
0b0000_0000_0000_1110_0000_0000 ... 3584(0xe00)
> 6 ... 6ビット右にシフトする
---------------------------------
0b0011_1000 ... 56 (0x38)
| 0b1000_0000 ... 128(0x80)との論理和をとる
---------------------------------
0b1011_1000 ... 184(0xb8)
code:4
0b0000_0010_1001_1110_0011_1101 ... 171581(0x29e3d)
& 0b0000_0000_0000_0000_0011_1111 ... 63 (0x0003f)との論理積をとる
---------------------------------
0b0011_1101 ... 61 (0x3d) ※ シフトはしない
| 0b1000_0000 ... 128(0x80)との論理和をとる
---------------------------------
0b1011_1101 ... 189(0xbd)